{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "db930060",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"OPENAI_API_KEY\"] = '你的OpenAI API Key'\n",
    "\n",
    "# 1.Load 导入Document Loaders\n",
    "from langchain.document_loaders import PyPDFLoader\n",
    "from langchain.document_loaders import Docx2txtLoader\n",
    "from langchain.document_loaders import TextLoader\n",
    "\n",
    "# 加载Documents\n",
    "documents = []\n",
    "for file in os.listdir('OneFlower'): \n",
    "    if file.endswith('.pdf'):\n",
    "        pdf_path = './OneFlower/' + file\n",
    "        loader = PyPDFLoader(pdf_path)\n",
    "        documents.extend(loader.load())\n",
    "    elif file.endswith('.docx') or file.endswith('.doc'):\n",
    "        doc_path = './OneFlower/' + file\n",
    "        loader = Docx2txtLoader(doc_path)\n",
    "        documents.extend(loader.load())\n",
    "    elif file.endswith('.txt'):\n",
    "        text_path = './OneFlower/' + file\n",
    "        loader = TextLoader(text_path)\n",
    "        documents.extend(loader.load())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9febbc05",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2.Split 将Documents切分成块以便后续进行嵌入和向量存储\n",
    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, \n",
    "                                               chunk_overlap=10)\n",
    "chunked_documents = text_splitter.split_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "4d9800ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3.Store 将分割嵌入并存储在矢量数据库Qdrant中\n",
    "from langchain.vectorstores import Qdrant\n",
    "from langchain.embeddings import OpenAIEmbeddings\n",
    "vectorstore = Qdrant.from_documents(\n",
    "    documents=chunked_documents, # 以分块的文档\n",
    "    embedding=OpenAIEmbeddings(), # 用OpenAI的Embedding Model做嵌入\n",
    "    location=\":memory:\",  # in-memory 存储\n",
    "    collection_name=\"my_documents\",) # 指定collection_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "55bb2903",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4. Retrieval 准备模型和Retrieval链\n",
    "import logging # 导入Logging工具\n",
    "from langchain.chat_models import ChatOpenAI # ChatOpenAI模型\n",
    "from langchain.retrievers.multi_query import MultiQueryRetriever # MultiQueryRetriever工具\n",
    "from langchain.chains import RetrievalQA # RetrievalQA链\n",
    "\n",
    "# 设置Logging\n",
    "logging.basicConfig()\n",
    "logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)\n",
    "\n",
    "# 实例化一个大模型工具 - OpenAI的GPT-3.5\n",
    "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
    "\n",
    "# 实例化一个MultiQueryRetriever\n",
    "retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)\n",
    "\n",
    "# 实例化一个RetrievalQA链\n",
    "qa_chain = RetrievalQA.from_chain_type(llm,retriever=vectorstore.as_retriever())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "610edc60",
   "metadata": {},
   "outputs": [
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "请随意提问 (或者输入'exit'退出):  易速鲜花的董事长致辞，总结一下\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "答案: {'query': '易速鲜花的董事长致辞，总结一下', 'result': '董事长在致辞中表示热烈欢迎新员工加入易速鲜花集团，并为公司能拥有优秀员工感到自豪和骄傲。他对员工即将为公司付出的努力表示诚挚的感谢，并表示易速鲜花将会因员工的努力工作而稳步健康发展，因员工的贡献而更加精彩。'}\n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "请随意提问 (或者输入'exit'退出):  董事长致辞中的企业精神具体是什么？\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "答案: {'query': '董事长致辞中的企业精神具体是什么？', 'result': '企业精神在董事长致辞中被描述为\"团结向上，完美无缺\"。这意味着易速鲜花希望每一位员工都能以高度的主人翁责任感和使命感，积极发扬团结合作的精神，在各自的岗位上勤奋敬业，尽职尽责，奋力拼搏。同时，易速鲜花也强调员工之间应该默契配合，相互接纳，取长补短，共同奋进，同舟共济。这样，易速鲜花可以成为一个坚强的战斗堡垒，在发展的道路上克服困难，迎接挑战。'}\n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "请随意提问 (或者输入'exit'退出):  谢谢\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "答案: {'query': '谢谢', 'result': '您好，欢迎光临易速鲜花！请问您需要帮助吗？'}\n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "请随意提问 (或者输入'exit'退出):  exit\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "谢谢使用 QA 系统!\n"
     ]
    }
   ],
   "source": [
    "# 5. 问答展示\n",
    "def ask_question(query):\n",
    "    # 使用RetrievalQA链来获取答案\n",
    "    response = qa_chain(query)\n",
    "    \n",
    "    # 返回得到的答案\n",
    "    return response\n",
    "\n",
    "# 为用户提供交互界面进行问答\n",
    "while True:\n",
    "    # 获取用户的问题\n",
    "    user_query = input(\"请随意提问 (或者输入'exit'退出): \")\n",
    "\n",
    "    # 如果用户输入\"exit\"，则退出循环\n",
    "    if user_query.lower() == 'exit':\n",
    "        break\n",
    "\n",
    "    # 使用定义的函数获取答案，并打印\n",
    "    answer = ask_question(user_query)\n",
    "    print(\"答案:\", answer)\n",
    "\n",
    "print(\"谢谢使用 QA 系统!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dae2e3d1-4bfb-48e4-96d6-292a1d8bcedd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
